Pollution is a global problem that takes many forms contaminating every aspect of the environment. Some of this pollution, specifically plastic-waste, ends up in our oceans, harming marine life, contaminating drinking water, and hurting local economies. In response, Ocean Conservancy’s Urban Ocean program has been developing projects that mitigate marine pollution, assess waste management, and enable cities to address ocean plastics and resilience. These projects intend to deploy “zero-waste” pilot strategies in cities around the world. With this wide-spanning outreach and diversity in partnerships, an assessment on site selection and resource allocation becomes a repeated step in the process that hinders efficiency each waste-reduction campaign.
Our objective is to develop a site assessment model that identifies effective zero-waste site locations for national and multinational use. This geospatial risk assessment model serves the purpose of predicting litter accumulation based on globally sourced data with a repeatable framework on an international scale. The results of the model aim to evaluate which sections of a given area have the highest likelihood to produce and contain litter relative to its surroundings. For our case, the Urban Ocean program intends to dedicate “zero-waste” solutions in these areas for the most effective impact for each deployment.
For our model, we directed our focus on data modeling from the Urban Ocean cities. To develop our model for each respective city (or future use), each area was determined by a ‘boundary box’ that was created by a custom KML file using Google My Maps (https://www.google.com/mymaps) or sourced online. Each boundary’s drawing was dictated by political boundaries and practical scope of where data was recorded within the city, further methodology of each individual city is detailed on the interactive dashboard.
In terms of our data & variables, acquiring litter data, our primary environmental health indicator, was acquired via manual download data from Marine Debris Tracker (https://www.debristracker.org/data/). The data for each city was carved out by a boundary box determined by initial-research political boundaries. From there, all debris recorded between January 2021 - February 2024 was compiled to a CSV and used as our primary independent variable. Each selected city required its own evaluation in terms of usable debris data over a several year time span. Ideal locations featured several thousand data points across 2-3 years for early & late stage model development. Despite the scalability of the model itself, results and accuracy lay contingent on data quantity and external factors available to each region.
After the boundary box was created, the data was then projected to a fishnet grid. The fishnet grid was used to count the number of litter points in each cell in a format that can be compared to by each grid cell in the city.
#step to load city's data
litter <- read.csv('https://raw.githubusercontent.com/TrevorKap/MUSA810-Marine-Pollution/main/Data/mdt-dataChennai.csv')
# data filter and projection transformation
litter_p <- litter%>%filter(master_material == 'PLASTIC')%>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326, agr = "constant")%>%st_transform('EPSG:32643')
#img <- raster("/Users/mr.smile/Desktop/UPENN/Spring24/CPLN790/data/population_ind_pak_general/population_10_lon_80_general-v1.5.tif")
chen_bdry <- st_read('https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Chennai.kml')
chen_bdry <- st_set_crs(chen_bdry, 4326)%>%st_transform('EPSG:32643')
temp_bd <- st_read('https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Chennai.kml')
temp_bbox <- get_bbox(temp_bd) # get the bounding box (the projection of temp_bd should be epsg4326)
temp_fish <- create_fish(chen_bdry) # get the fishnet of the city (the projection of chen_bdry should be meter degree)
final_net <- countfishnet(temp_fish, litter_p) # create base fishnet with litter (also the one used as final one)
final_net <- pn_gen(stor_df) # add osm point data and knn calculation result into the final dataset
#temp_point <- raster_process(img,temp_bd) # convert the raster file to point one
#pop_result <- pop_process(temp_point, temp_fish, 32643) # summary the population result
#final_net <- add_pop(pop_result,final_net) # add the pop result into the final dataset
final_net <- moran_gen(final_net,stor_df) # calculate the moran's I result into the dataset
# DONE!
chen_net <- final_netcity_data_list <- lapply(cities, load_city_data)
names(city_data_list) <- cities
bd_data_list <- lapply(cities, load_city_kml)
names(bd_data_list) <- bd
bd_data_meter_list <- lapply(cities, load_city_kml_meter)
names(bd_data_meter_list) <- bdM
for (i in seq_along(city_data_list)) {assign(cities[i], city_data_list[[i]])}
for (i in seq_along(bd_data_list)) {assign(bd[i], bd_data_list[[i]])}
for (i in seq_along(bd_data_meter_list)) {assign(bdM[i], bd_data_meter_list[[i]])}base_path <- "https://raw.githubusercontent.com/TrevorKap/MUSA810-Marine-Pollution/main/Data/stored_city/"
city_names <- c("santa_fe", "semarang", "panama", "can_tho", "melaka", "salvador",
"surat", "santiago", "bangkok", "chennai", "mumbai", "pune")
city_data <- list()
for (city in city_names) {
file_path <- paste0(base_path, city, "_net.geojson")
net_sf <- st_read(file_path)
net_sf <- st_set_crs(net_sf, 32643)
city_data[[city]] <- net_sf
}
list2env(setNames(city_data, paste0("net_", city_names)), envir = .GlobalEnv)cnt_wt <- function(litter,net){
temp <- st_join(litter,net,join = st_within)
temp <- temp %>% filter(!is.na(uniqueID))
temp_sum <- temp %>%
group_by(uniqueID)%>%
summarise(count_un = n_distinct(username))
net <- left_join(net,st_drop_geometry(temp_sum),by = 'uniqueID')
net <- net %>%
mutate(count_un = replace_na(count_un, 0),
count = count/count_un,
count = replace_na(count, 0))
return(net)
}
net_mumbai <- cnt_wt(Mumbai,net_mumbai)
net_chennai <- cnt_wt(Chennai,net_chennai)
net_bangkok <- cnt_wt(Bangkok,net_bangkok)
net_can_tho <- cnt_wt(Can_Tho,net_can_tho)
net_melaka <- cnt_wt(Melaka,net_melaka)
net_panama <- cnt_wt(Panama_City,net_panama)
net_pune <- cnt_wt(Pune,net_pune)
net_salvador <- cnt_wt(Salvador,net_salvador)
net_santa_fe <- cnt_wt(Santa_Fe,net_santa_fe)
net_santiago <- cnt_wt(Santiago,net_santiago)
net_semarang <- cnt_wt(Semarang,net_semarang)
net_surat <- cnt_wt(Surat,net_surat)z_score_normalize <- function(x) {
(x - mean(x)) / sd(x)
}
normal_city <- function(data){
data <- data%>%
mutate(across(where(is.numeric) & -'count', z_score_normalize),
across(where(is.numeric), ~replace(., is.nan(.), 0)))
return(data)
}
net_salvador <- normal_city(net_salvador)
net_santa_fe <- normal_city(net_santa_fe)
net_santiago <- normal_city(net_santiago)
net_semarang <- normal_city(net_semarang)
net_surat <- normal_city(net_surat)
net_bangkok <- normal_city(net_bangkok)
net_can_tho <- normal_city(net_can_tho)
net_chennai <- normal_city(net_chennai)
net_melaka <- normal_city(net_melaka)
net_mumbai <- normal_city(net_mumbai)
net_panama <- normal_city(net_panama)
net_pune <- normal_city(net_pune)net_bangkok <- net_bangkok %>% mutate(city = 'Bangkok',country = 'Thailand')
net_can_tho <- net_can_tho %>% mutate(city = 'Can_Tho',country = 'Vietnam')
net_chennai <- net_chennai %>% mutate(city = 'Chennai',country = 'India')
net_melaka <- net_melaka %>% mutate(city = 'Melaka',country = 'Malaysia')
net_mumbai <- net_mumbai %>% mutate(city = 'Mumbai',country = 'India')
net_panama <- net_panama %>% mutate(city = 'Panama_City',country = 'Panama')
net_pune <- net_pune %>% mutate(city = 'Pune',country = 'India')
net_salvador <- net_salvador %>% mutate(city = 'Salvador',country = 'Brazil')
net_santa_fe <- net_santa_fe %>% mutate(city = 'Santa_Fe',country = 'Argentina')
net_santiago <- net_santiago %>% mutate(city = 'Santiago',country = 'Chile')
net_semarang <- net_semarang %>% mutate(city = 'Semarang',country = 'Indonesia')
net_surat <- net_surat %>% mutate(city = 'Surat',country = 'India')
net_total <- rbind(net_bangkok,net_can_tho,net_chennai,net_melaka,net_mumbai,net_panama,net_pune,net_salvador,net_santa_fe,net_santiago,net_semarang,net_surat) %>% mutate(uniqueID = 1:n())
net_total_li <- net_total %>% dplyr::filter(count != 0)
net_total_li <- net_total_li %>%
mutate(count = (count - mean(count)) / sd(count))The litter data features a wide variety of item categories and general information about each piece. Despite the quality of detail for each recorded sample, this only accounts for litter that has been identified, recorded, and disposed of. This data does not account for litter that was identified but never disposed of, accounted for, or assumed. Regardless, areas where litter was not recorded in a general surrounding could not confidently be assumed to be present or not present. Hence, the purpose of the model is to predict where litter is most likely to accumulate based on the data available.
Each chart below visualizes a dependent variable reformatted for repeatable use in context that relates to its association with litter accumulation. The examples are, restaurants, roads, retail proximity, and ‘significant presence’ of restaurants. These are simply the visualized examples and our variables broadly include land-use, proximity to water, waste site facilities, roads, and other indicators of commercial activity. Our variable selection was based on our hypothesis of areas of human activity leading to higher litter risk. The aim of these variables is to act as proxies for litter cases, providing insight on where litter most likely accumulated or ends up. Each variable is then placed on a combined fishnet grid with the litter data, computed using a Chi-Squared test, and evaluated for association between the variables and litter.
The first map shows the count of restaurants in Chennai, India, similar to the litter data collected, there are few restaurants in any single area, but have tens of each counted throughout that provide insight of the general density/areas of high-activity.
The second map shows the roads in Chennai, India. Roads indicate urban development and density. All forms of data, whether points, lines, or polygons, are represented onto the fishnet grid to allow for comprehensive data in a uniformed format that allows for ease of comparison and analysis.
The third map shows the ‘nearest neighbor’ distance of retail in Santiago, Chile. The nearest neighbor analysis is used to find the closest points of the same type. This allows us to find the proximity of another variant to determine commercial/high activity spots in the area. The nearest neighbor factor was repeated for restaurants, land-use, and water.
Lastly, a significance analysis was conducted on the presence of restaurants in Bangkok, Thailand. The same variables (land-use, water, etc) used for their nearest neighbor factor was also used in their significance aspect. We wanted to examine a ‘significance’ analysis because of their direct implication with high human activity, excess flow of goods, and interconnection of urban systems. Areas of low distances of a “nearest neighbor” imply restaurants are close to one another geographically, indicate density in resources, and identify an urban core.
library(ggplot2)
library(stringr)
map_waste <- visual_count(net_chennai, "waste")
map_waste_nn <- visual_count(net_chennai,'waste_nn')
grid.draw(grobTree(rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(map_waste, map_waste_nn, ncol = 2)))map_water <- visual_count(net_chennai,"water")
map_restaurant <- visual_count(net_chennai,"restaurant")
map_road <- visual_count(net_chennai,'road')
map_industrial <- visual_count(net_chennai,'industrial')
map_residential <- visual_count(net_chennai,"residential")
map_retail <- visual_count(net_chennai,'retail')
grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(map_water, map_restaurant, ncol = 2)
)
)grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(map_road, map_industrial, ncol = 2)
)
)grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(map_residential, map_retail, ncol = 2)
)
)map_water_nn <- visual_count(net_chennai,'water_nn')
map_restaurant_nn <- visual_count(net_chennai,"restaurant_nn")
map_road_nn <- visual_count(net_chennai,'road_nn')
map_industrial_nn <- visual_count(net_chennai,'industrial_nn')
map_residential_nn <- visual_count(net_chennai,"residential_nn")
map_retail_nn <- visual_count(net_chennai,'retail_nn')
grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(map_water_nn, map_restaurant_nn, ncol = 2)
)
)grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(map_road_nn, map_industrial_nn, ncol = 2)
)
)grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(map_residential_nn, map_retail_nn, ncol = 2)
)
)The PCA analysis below is used to identify the most important variables in the dataset and their correlation with litter. The reason why we perform PCA is to decrease the dimension of variable. The following result can get explained by the color and direction in the last map. and the last PCA map is the basic for choosing the several variables. Based on the result, the selected variables included ‘waste_sig_dis, restaurant_sig_dis, residential_sig_dis, water_sig_dis, residential_nn, industrial_sig_dis, industrial_sig, restaurant_sig, industrial_nn, road_sig_dis, residential_sig, restaurant_sig, restaurant’ as the ‘shortened model’ independent variables.
corr <- st_drop_geometry(net_total) %>% dplyr::select(!c(uniqueID,cvID,city,country))
corr_nor<- scale(corr)
corr_matrix <- cor(corr_nor)
#ggcorrplot(corr_matrix)
data.pca <- princomp(corr_matrix)
summary(data.pca)## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.1544504 0.36523214 0.34046877 0.31029899 0.25846779
## Proportion of Variance 0.6144093 0.06149576 0.05343942 0.04438824 0.03079783
## Cumulative Proportion 0.6144093 0.67590503 0.72934445 0.77373269 0.80453052
## Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## Standard deviation 0.23518531 0.21725529 0.20707302 0.1916407 0.18403398
## Proportion of Variance 0.02549926 0.02175945 0.01976761 0.0169310 0.01561361
## Cumulative Proportion 0.83002978 0.85178923 0.87155684 0.8884878 0.90410144
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
## Standard deviation 0.17324598 0.15353018 0.143181926 0.1332383 0.120928775
## Proportion of Variance 0.01383673 0.01086663 0.009451127 0.0081840 0.006741655
## Cumulative Proportion 0.91793817 0.92880480 0.938255928 0.9464399 0.953181583
## Comp.16 Comp.17 Comp.18 Comp.19
## Standard deviation 0.120295484 0.102914317 0.101110151 0.100044032
## Proportion of Variance 0.006671229 0.004882686 0.004712992 0.004614127
## Cumulative Proportion 0.959852812 0.964735498 0.969448490 0.974062617
## Comp.20 Comp.21 Comp.22 Comp.23
## Standard deviation 0.097015031 0.09618051 0.092972876 0.088041257
## Proportion of Variance 0.004338956 0.00426463 0.003984921 0.003573384
## Cumulative Proportion 0.978401573 0.98266620 0.986651124 0.990224508
## Comp.24 Comp.25 Comp.26 Comp.27
## Standard deviation 0.080715923 0.068107836 0.058459405 0.052915014
## Proportion of Variance 0.003003486 0.002138461 0.001575491 0.001290818
## Cumulative Proportion 0.993227994 0.995366455 0.996941947 0.998232765
## Comp.28 Comp.29 Comp.30 Comp.31
## Standard deviation 0.0409395731 0.035875430 0.0252563016 0.0152463435
## Proportion of Variance 0.0007726696 0.000593337 0.0002940673 0.0001071615
## Cumulative Proportion 0.9990054343 0.999598771 0.9998928385 1.0000000000
## Comp.32
## Standard deviation 2.228071e-09
## Proportion of Variance 2.288576e-18
## Cumulative Proportion 1.000000e+00
## Comp.1 Comp.2
## count 0.03910654 0.02824418
## water 0.06939559 -0.16334301
## water_nn -0.25294001 0.17990491
## waste 0.06006527 -0.06645713
## waste_nn -0.28343700 0.02087865
## restaurant 0.12232184 -0.04222381
## restaurant_nn -0.27714032 -0.08945392
## road 0.16341393 0.41090861
## road_nn -0.13750912 -0.31917998
## industrial 0.04557497 -0.23972791
## industrial_nn -0.12543259 0.22518605
## residential 0.12280232 -0.03065005
## residential_nn -0.16985709 -0.10984955
## retail 0.08343909 -0.10307191
## retail_nn -0.27734971 0.02874484
## avg_pop 0.23916125 0.24137571
## sum_pop 0.25998533 0.29686304
## water_sig 0.09268665 -0.18719624
## water_sig_dis -0.26523016 0.18214397
## waste_sig 0.10033340 -0.10307020
## waste_sig_dis -0.25677004 0.09727076
## restaurant_sig 0.13120168 -0.05896123
## restaurant_sig_dis -0.28062883 -0.01118523
## road_sig 0.05390253 0.23012861
## road_sig_dis -0.09843262 -0.27218982
## industrial_sig 0.05587300 -0.26867581
## industrial_sig_dis -0.15335016 0.23961425
## residential_sig 0.12014661 -0.10190999
## residential_sig_dis -0.25040796 0.03631134
## retail_sig 0.10677570 -0.11630429
## retail_sig_dis -0.23770606 0.03603264
## count_un 0.05246863 0.02625072
fviz_eig(data.pca, addlabels = TRUE) + ggtitle("Eigenvalues from Principal Component Analysis") + plotTheme()n_colors <- 100
mako_gradient <- viridis(n_colors, option = "mako")
fviz_pca_var(data.pca, col.var = "cos2",
gradient.cols = mako_gradient,
repel = TRUE) + plotTheme()The following random forest model utilizes the TidyModels package.
set.seed(123)
net_tt_nor <- st_drop_geometry(net_total_li) %>%
dplyr::select(-c(uniqueID,cvID,city,country))
net_tt_nor$city <- net_total_li$city
net_tt_nor$uniqueID <- net_total_li$uniqueID
net_tt_temp <- net_tt_nor%>%dplyr::select(!c(uniqueID,city))
net_tt_split <- initial_split(net_tt_temp, prop = 0.75)
train_data <- training(net_tt_split)
test_data <- testing(net_tt_split)
rf_model <- rand_forest(
mode = "regression",
trees = tune(),
min_n = tune(),
mtry = tune(),
) %>% set_engine("ranger")
rf_recipe <- recipe(count ~ ., data = train_data)
cv_folds <- vfold_cv(train_data, v = 10)
rf_grid <- rf_grid <- grid_regular(
trees(range = c(500, 2000)),
min_n(range = c(9, 15)),
mtry(range = c(6, 30)),
levels = c(4, 3, 5)
)
rf_workflow <- workflow() %>%
add_model(rf_model) %>%
add_recipe(rf_recipe)
rf_results <- tune_grid(
rf_workflow,
resamples = cv_folds,
grid = rf_grid
)
show_best(rf_results, metric = "rmse")## # A tibble: 5 × 9
## mtry trees min_n .metric .estimator mean n std_err .config
## <int> <int> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 6 1000 15 rmse standard 0.844 10 0.0862 Preprocessor1_Model10
## 2 6 1500 15 rmse standard 0.845 10 0.0853 Preprocessor1_Model11
## 3 6 500 12 rmse standard 0.846 10 0.0866 Preprocessor1_Model05
## 4 6 1500 9 rmse standard 0.847 10 0.0858 Preprocessor1_Model03
## 5 6 2000 15 rmse standard 0.847 10 0.0852 Preprocessor1_Model12
final_rf <- finalize_workflow(
rf_workflow,
select_best(rf_results, metric = "rmse")
)
final_rf <- last_fit(final_rf, split = net_tt_split )
#results <- collect_metrics(final_rf)
#print(results)
temp_ts <- predict(final_rf$.workflow[[1]], test_data) %>%
rename(Prediction = .pred)
ts_bind <- cbind(test_data,temp_ts)
#best_results <- select_best(rf_results, metric = "rmse")
temp <- predict(final_rf$.workflow[[1]], net_total) %>%
rename(Prediction = .pred)
temp_cat <- risk_level(temp,'kmeans')
df_rf_rst <- cbind(net_total,temp_cat)set.seed(223)
train_control <- trainControl(method = "cv", number = 60)
model <- train(count ~ ., data = train_data, method = "lm",family = "quasi",trControl = train_control)
test_temp <- predict(model,net_total)
test_cbind <- cbind(net_total,test_temp)%>%
rename(Prediction = test_temp)
temp_risk_cat <- st_drop_geometry(risk_level(test_cbind,'kmeans')) %>%dplyr::select(Risk_Category)
test_cbind <- cbind(test_cbind,temp_risk_cat)rf_test <- ts_bind %>% dplyr::select(count,Prediction)
lr_rst <- predict(model,test_data)
lr_test <- cbind(test_data,lr_rst)%>% rename(Prediction = lr_rst) %>% dplyr::select(count,Prediction)
rf_test$Residuals <- rf_test$count - rf_test$Prediction
lr_test$Residuals <- lr_test$count - lr_test$Prediction
ggplot(rf_test, aes(x = count, y = Prediction)) +
geom_point() +
geom_segment(aes(xend = count, yend = Prediction, x = count, y = count),
linetype = "dotted", color = "red") +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "blue") +
labs(x = "Actual Value", y = "Predicted Value", title = "RandomForest Model Residuals") +
plotTheme()ggplot(lr_test, aes(x = count, y = Prediction)) +
geom_point() + # Add points
geom_segment(aes(xend = count, yend = Prediction, x = count, y = count),
linetype = "dotted", color = "red") + # Residual lines
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "blue") +
labs(x = "Actual Value", y = "Predicted Value", title = "Linear Regression Model Residuals") +
plotTheme()mixed_rst <- cbind(lr_test%>%dplyr::select(-Residuals),rf_test$Prediction)%>%
rename(lr_pred = Prediction,
rf_pred = 'rf_test$Prediction') %>%
mutate(Prediction = 0.5*rf_pred + 0.5*lr_pred,
Residuals = count - Prediction)
ggplot(mixed_rst, aes(x = Residuals)) +
geom_histogram(bins = 30, fill = "grey")+
ggtitle("Histogram of Residuals") +
xlab("Values") +
ylab("Frequency") +
plotTheme()grid.arrange(
ggplot(mixed_rst, aes(x = Residuals)) +
geom_histogram(bins = 30, fill = "grey") + plotTheme() ,
ggplot(lr_test, aes(x = Residuals)) +
geom_histogram(bins = 30, fill = "grey") + plotTheme() ,
ggplot(rf_test, aes(x = Residuals)) +
geom_histogram(bins = 30, fill = "grey") + plotTheme() ,nrow = 3)ggplot(mixed_rst, aes(x = count, y = Prediction)) +
geom_point() + # Add points
geom_segment(aes(xend = count, yend = Prediction, x = count, y = count),
linetype = "dotted", color = "red") + # Residual lines
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "blue") +
labs(x = "Actual Value", y = "Predicted Value", title = 'Mixed Model Residuals') +
plotTheme()ggplot(mixed_rst, aes(sample = Residuals)) +
stat_qq() +
stat_qq_line(colour = "red") +
ggtitle("Normal Q-Q Plot") +
xlab("Theoretical Quantiles") +
ylab("Sample Quantiles") +
plotTheme()test_cb_rst <- st_drop_geometry(test_cbind) %>%
dplyr::select(Prediction,Risk_Category)%>%
rename(Pre_lr = Prediction,
Risk_lr = Risk_Category)
rst_total <- cbind(df_rf_rst,test_cb_rst)%>%
rename(Pre_rf = Prediction,
Risk_rf = Risk_Category)
rst_total <- rst_total %>%
mutate(Prediction = 0.4*Pre_rf + 0.6*Pre_lr)
rst_total <- risk_level(rst_total,'kmeans')
grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(
city_viz('Chennai', test_cbind, 'Linear Regression Model'),
city_viz('Chennai', df_rf_rst, 'Random Forest Model'),
nrow = 1
)
)
)grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(city_viz('Chennai', rst_total, 'Mixed Model'), nrow = 1)
)
)risk_v_new <-function(model_data,litter_data,model,city){
model_data$Risk_Category <- as.factor(model_data$Risk_Category)
ggplot() +
geom_sf(data = model_data, aes(fill = Risk_Category), colour = NA) +
geom_sf(data = litter_data, size = .3, colour = "red") +
scale_fill_manual(values = risk_palette, guide = "none") +
labs(title=model) +
mapTheme(title_size = 8)
}
city_viz_new <- function(cities,data,model){
temp <- data %>% filter(city == cities)
risk_v_new(temp,get(cities),model,cities)
}
risk_Chennai <- city_viz_new('Chennai', rst_total, 'Chennai')
risk_Bangkok <- city_viz_new('Bangkok', rst_total, 'Bangkok')
risk_Can_Tho <- city_viz_new('Can_Tho', rst_total, 'Can Tho')
risk_Melaka <- city_viz_new('Melaka', rst_total, 'Melaka')
risk_Mumbai <- city_viz_new('Mumbai', rst_total, 'Mumbai')
risk_Panama_City <- city_viz_new('Panama_City', rst_total, 'Panama City')
risk_Pune <- city_viz_new('Pune', rst_total, 'Pune')
risk_Salvador <- city_viz_new('Salvador', rst_total, 'Salvador')
risk_Santa_Fe <- city_viz_new('Santa_Fe', rst_total, 'Santa Fe')
risk_Santiago <- city_viz_new('Santiago', rst_total, 'Santiago')
risk_Semarang <- city_viz_new('Semarang', rst_total, 'Semarang')
risk_Surat <- city_viz_new('Surat', rst_total, 'Surat')
grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(
risk_Chennai, risk_Bangkok, risk_Can_Tho, risk_Melaka, risk_Mumbai,
risk_Panama_City, nrow = 2
)
)
)grid.draw(
grobTree(
rectGrob(gp=gpar(fill="#ecf6ff", lwd=0, col = "#ecf6ff")),
grid.arrange(
risk_Pune, risk_Salvador, risk_Santa_Fe, risk_Santiago, risk_Semarang, risk_Surat,
nrow = 2
)
)
)Data Sources *
Image Sources *
prettydoc::html_pretty: theme: leonids